Addgrad
逐元素计算加法梯度
\[\begin{split}dx1 = \frac{\partial L}{\partial X1} = \frac{\partial L}{\partial Y} * 1 = \frac{\partial L}{\partial Y}\\
dx2 = \frac{\partial L}{\partial X2} = \frac{\partial L}{\partial Y} * 1 = \frac{\partial L}{\partial Y}\end{split}\]
- 输入:
dy - dy数据地址。
- params - 参数打包成结构体。
dx1_dim - x1的维度信息。
dx2_dim - x2的维度信息。
dy_dims - dy的维度信息。
num_dims - 维度数
temp_space - 临时空间
core_mask - 核掩码。
- 输出:
dx1 - dx1的数据地址。
dx2 - dx2的数据地址。
- 支持平台:
FT78NEMT7004
备注
FT78NE 支持fp32
MT7004 支持fp16, fp32
dx1_dim, dx2_dim, dy_dims, num_dims按顺序打包成了一个long long型数组
共享存储版本:
-
void fp_add_grad_s(float *dy, float *dx1, float *dx2, long long *params, int core_mask)
-
void hp_add_grad_s(half *dy, half *dx1, half *dx2, long long *params, int core_mask)
C调用示例:
1//FT78NE示例
2#include <stdio.h>
3#include <addgrad.h>
4
5int main(int argc, char* argv[]) {
6 float *dy = (float *)0x81000000;
7 float *dx1 = (float *)0x82000000;
8 float *dx2 = (float *)0x83000000;
9 int *tempspace = (int *)0x84000000;
10 int core_mask = 0b1111;
11
12 int i;
13
14 // same shape
15 int dx1_dims[] = {4, 8, 32}; //1024
16 int dx2_dims[] = {1, 8, 32}; //
17 int dy_dims[] = {4, 8, 32}; //
18 int num_dims = 3;
19
20 int dx1_num = get_total_elements(num_dims, dx1_dims);
21 int dx2_num = get_total_elements(num_dims, dx2_dims);
22 int dy_num = get_total_elements(num_dims, dy_dims);
23
24 for (i = 0; i < dy_num; ++i) {
25 dy[i] = (float)(rand() % 100) / 10.0f;
26 }
27
28 long long params[6];
29 params[0] = (unsigned long long)dx1_dims;
30 params[1] = (unsigned long long)dx2_dims;
31 params[2] = (unsigned long long)dy_dims;
32 params[3] = (unsigned long long)num_dims;
33 params[4] = (unsigned long long)tempspace;
34
35 fp_add_grad_s(dy, dx1, dx2, params, core_mask);
36 return 0;
37}
私有存储版本:
-
void fp_add_grad_p(float *dy, float *dx1, float *dx2, long long *params)
-
void hp_add_grad_p(half *dy, half *dx1, half *dx2, long long *params)
C调用示例:
1//FT78NE示例
2#include <stdio.h>
3#include <addgrad.h>
4
5int main(int argc, char* argv[]) {
6 float *dy = (float *)0x10010000;
7 float *dx1 = (float *)0x10020000;
8 float *dx2 = (float *)0x10030000;
9 int *tempspace = (int *)0x10040000;
10
11 int i;
12
13 // same shape
14 int dx1_dims[] = {4, 8, 32}; //1024
15 int dx2_dims[] = {1, 8, 32}; //
16 int dy_dims[] = {4, 8, 32}; //
17 int num_dims = 3;
18
19 int dx1_num = get_total_elements(num_dims, dx1_dims);
20 int dx2_num = get_total_elements(num_dims, dx2_dims);
21 int dy_num = get_total_elements(num_dims, dy_dims);
22
23 for (i = 0; i < dy_num; ++i) {
24 dy[i] = (float)(rand() % 100) / 10.0f;
25 }
26
27 long long params[6];
28 params[0] = (unsigned long long)dx1_dims;
29 params[1] = (unsigned long long)dx2_dims;
30 params[2] = (unsigned long long)dy_dims;
31 params[3] = (unsigned long long)num_dims;
32 params[4] = (unsigned long long)tempspace;
33
34 fp_add_grad_p(dy, dx1, dx2, params);
35 return 0;
36}